clear all;
close all;

%  =======================================================================================================================================================
% 
%  Code Description: 
%  This codefile constructs the dataset underlying the liquidity management regressions presented in Section 3.2.2 (Flow-Driven Liquidity Management)
%  and Table 4.
% 
%  =======================================================================================================================================================
% 
%  Major output:
%  - dataset underlying the liquidity management regressions and Table 4
%  =======================================================================================================================================================
% 
%  General disclaimer:
%  This file directory produces replication code for "Connected Funds". 
%  Because we cannot share the underlying data provided by the Bundesbank's Research Data and Service Centre (RDSC) and other subscription data sources, 
%  we have included pseudo data to show how the raw data are formatted. 
%  Other researchers can go through a similar approval and subscription process to obtain the underlying data. (2023-04-06)
% 
%  =======================================================================================================================================================


%% Set project directory

projectPath = 'C:\ConnectedFunds_Codebase\'

% Add functions folders
addpath(strcat(projectPath, 'Code\matlab_functions'));

cd 'C:\ConnectedFunds_Codebase\Code\'


%  ============== Construct dataset on holdings changes of German investment funds (differentiate between corp. bonds, sov. bonds, equities and fund shares) =================


% Load fund data
load(strcat(projectPath, 'Data\IFS\IFS_All_CleanData'));

% Filter fund data
IFS_All(IFS_All.artmittel == 4, :)  = [];
IFS_All(IFS_All.artmittel == 6, :)  = [];
IFS_All(IFS_All.artmittel > 7, :)   = [];
IFS_All(IFS_All.etf == 1, :)        = [];

All_Results_DeltaHoldings = [];

% Loop over time and build dataset on holdings changes
for t = 2 : length(umonth)
    
    umonth(t),

    % Get funds' holdings in (t-1) 
    load(strcat(projectPath, 'Data\IFS\mat\IFS_Holdings_', num2str(umonth(t-1)), '.mat'));
    IFS_Holdings_1     = IFS_Holdings;
    IFS_Holdings_1(IFS_Holdings_1.AMOUNT<0,:) = [];
    IFS_Holdings_1.SECPRICE(startsWith(IFS_Holdings_1.SECQUOT,"P")) = IFS_Holdings_1.SECPRICE(startsWith(IFS_Holdings_1.SECQUOT,"P"))./100;

    % Get funds' holdings in t   
    load(strcat(projectPath, 'Data\IFS\mat\IFS_Holdings_',num2str(umonth(t)),'.mat'));
    IFS_Holdings_2     = IFS_Holdings;
    IFS_Holdings_2(IFS_Holdings_2.AMOUNT<0,:) = [];
    IFS_Holdings_2.SECPRICE(startsWith(IFS_Holdings_2.SECQUOT,"P")) = IFS_Holdings_2.SECPRICE(startsWith(IFS_Holdings_2.SECQUOT,"P"))./100;

    clear IFS_Holdings;

    % Keep funds that report in both months
    ufund  = intersect(IFS_Holdings_1.ISIN, IFS_Holdings_2.ISIN);
    [~,idx] = ismember(IFS_Holdings_1.ISIN,ufund); IFS_Holdings_1(idx==0,:) = [];
    [~,idx] = ismember(IFS_Holdings_2.ISIN,ufund); IFS_Holdings_2(idx==0,:) = [];

    % Keep assets that were reported in both months;
    uasset = intersect(IFS_Holdings_1.SECCODE, IFS_Holdings_2.SECCODE);
    [~,idx] = ismember(IFS_Holdings_1.SECCODE,uasset); IFS_Holdings_1(idx==0,:) = [];
    [~,idx] = ismember(IFS_Holdings_2.SECCODE,uasset); IFS_Holdings_2(idx==0,:) = [];
    clear idx

    ufund  = intersect(IFS_Holdings_1.ISIN, IFS_Holdings_2.ISIN);
    [~,idx] = ismember(IFS_Holdings_1.ISIN,ufund); IFS_Holdings_1(idx==0,:) = [];
    [~,idx] = ismember(IFS_Holdings_2.ISIN,ufund); IFS_Holdings_2(idx==0,:) = [];

    uasset = intersect(IFS_Holdings_1.SECCODE, IFS_Holdings_2.SECCODE); 
    ufund  = intersect(IFS_Holdings_1.ISIN, IFS_Holdings_2.ISIN);

    % Merge security-level information from CSDB
    load(strcat(projectPath, 'Data\CSDB\mat\CSDB_',num2str(umonth(t)),'.mat'));
    CSDB_2 = sortrows(CSDB,'ISIN','ascend');
    clear CSDB

    [~,idx]             = ismember(CSDB_2.ISIN,uasset);
    CSDB_2(idx==0,:)    = [];
    [~,idx]             = ismember(uasset,CSDB_2.ISIN);
    uasset(idx==0)      = [];
    clear idx;

    load(strcat(projectPath, 'Data\CSDB\mat\CSDB_',num2str(umonth(t-1)),'.mat'));
    CSDB_1 = sortrows(CSDB,'ISIN','ascend');
    clear CSDB;

    CSDB_2.PRICE_Lag            = NaN(height(CSDB_2),1);
    [~,idx]                     = ismember(CSDB_2.ISIN,CSDB_1.ISIN);
    CSDB_2.PRICE_Lag(idx>0,:)   = CSDB_1.PRICE(idx(idx>0));

    CSDB_2.Return                           = (CSDB_2.PRICE - CSDB_2.PRICE_Lag)./CSDB_2.PRICE_Lag;
    CSDB_2.Return(abs(CSDB_2.Return)==Inf)  = NaN;
    ql                                      = quantile(CSDB_2.Return,0.01);
    qh                                      = quantile(CSDB_2.Return,1-0.01);
    CSDB_2.Return(CSDB_2.Return > qh)       = qh;
    CSDB_2.Return(CSDB_2.Return < ql)       = ql;
    clear CSDB_1;

    [~,idxAsset_1] = ismember(IFS_Holdings_1.SECCODE,uasset);
    IFS_Holdings_1(idxAsset_1 == 0,:) = []; idxAsset_1(idxAsset_1==0) = [];
    [~,idxAsset_2] = ismember(IFS_Holdings_2.SECCODE,uasset);
    IFS_Holdings_2(idxAsset_2 == 0,:) = []; idxAsset_2(idxAsset_2==0) = [];

    [~,idxFund_1]  = ismember(IFS_Holdings_1.ISIN,ufund);
    [~,idxFund_2]  = ismember(IFS_Holdings_2.ISIN,ufund);

    % Aggregate asset-level prices and holdings
    AssetPrice_1       = accumarray([idxFund_1 idxAsset_1],IFS_Holdings_1.SECPRICE,[length(ufund) length(uasset)],'', 0);
    TotalHoldingsNum_1 = accumarray([idxFund_1 idxAsset_1],IFS_Holdings_1.SECNOMUN,[length(ufund) length(uasset)],'', 0);

    AssetPrice_2       = accumarray([idxFund_2 idxAsset_2],IFS_Holdings_2.SECPRICE,[length(ufund) length(uasset)],'', 0);
    TotalHoldingsNum_2 = accumarray([idxFund_2 idxAsset_2],IFS_Holdings_2.SECNOMUN,[length(ufund) length(uasset)],'', 0);

    % Total holdings
    TotalHoldings_1          = (TotalHoldingsNum_1 .* AssetPrice_1);
    TotalHoldings_2          = (TotalHoldingsNum_2 .* AssetPrice_2);
    TotalHoldings_2_PriceLag = TotalHoldings_2./repmat((1 + CSDB_2.Return'),size(TotalHoldings_1,1),1);

    AssetType = 1*startsWith(CSDB_2.ESA_INS_1995,'F.3') + ...
                2*startsWith(CSDB_2.ESA_INS_1995,'F.51') + ...
                3*startsWith(CSDB_2.ESA_INS_1995,'F.52');
    AssetSov  = startsWith(CSDB_2.ESA_ISSUER_1995,'S.13');
    AssetGerman = startsWith(CSDB_2.NAT_INS_CODE_TYPE,'DE');

    % Total holdings changes
    DeltaHoldings                  = (TotalHoldings_2 - TotalHoldings_1);
    DeltaHoldings_NoPriceEffect    = TotalHoldings_2_PriceLag - TotalHoldings_1;
    DeltaHoldings_PriceEffect      = TotalHoldings_2 - TotalHoldings_2_PriceLag;
    
    % Export data to csv
    [i,j]       = find(TotalHoldings_1 + TotalHoldings_2 > 0);
    k           = find(TotalHoldings_1 + TotalHoldings_2 > 0);
    tmp_data    = table(repmat(umonth(t),length(k),1),...
                     ufund(i),...
                     uasset(j),...
                     TotalHoldings_1(k),...
                     TotalHoldings_2(k),...
                     TotalHoldings_2_PriceLag(k), ...
                     DeltaHoldings(k),...
                     DeltaHoldings_NoPriceEffect(k),...
                     DeltaHoldings_PriceEffect(k),...
                     CSDB_2.Return(j),...
                     AssetType(j),...
                     AssetSov(j));
    tmp_data.Properties.VariableNames{1} = 'DATUM';
    tmp_data.Properties.VariableNames{2} = 'ISIN';
    tmp_data.Properties.VariableNames{3} = 'WPKENN';
    tmp_data.Properties.VariableNames{4} = 'lag_holdings';
    tmp_data.Properties.VariableNames{5} = 'holdings';
    tmp_data.Properties.VariableNames{6} = 'holdings_lagprice';
    tmp_data.Properties.VariableNames{7} = 'delta_holdings';
    tmp_data.Properties.VariableNames{8} = 'delta_holdings_price';
    tmp_data.Properties.VariableNames{9} = 'delta_holdings_volume';
    tmp_data.Properties.VariableNames{10} = 'asset_return';
    tmp_data.Properties.VariableNames{11} = 'asset_type';
    tmp_data.Properties.VariableNames{12} = 'asset_isSovereign';
    writetable(tmp_data, strcat(projectPath, 'Data\JiangEtAl\tmp_data_',num2str(t-1)));    
    
    % Compute holdings changes by asset class
    DeltaHoldings_SovBond                = nansum(DeltaHoldings(:,AssetType == 1 & AssetSov == 1)')';
    DeltaHoldings_PriceEffect_SovBond    = nansum(DeltaHoldings_PriceEffect(:,AssetType == 1 & AssetSov == 1)')';
    DeltaHoldings_NoPriceEffect_SovBond  = nansum(DeltaHoldings_NoPriceEffect(:,AssetType == 1 & AssetSov == 1)')';
    TotalHoldings_SovBond_1              = nansum(TotalHoldings_1(:,AssetType == 1 & AssetSov == 1)')';
    TotalHoldings_SovBond_2              = nansum(TotalHoldings_2(:,AssetType == 1 & AssetSov == 1)')';

    DeltaHoldings_CorpBond                = nansum(DeltaHoldings(:,AssetType == 1 & AssetSov == 0)')';
    DeltaHoldings_PriceEffect_CorpBond    = nansum(DeltaHoldings_PriceEffect(:,AssetType == 1 & AssetSov == 0)')';
    DeltaHoldings_NoPriceEffect_CorpBond  = nansum(DeltaHoldings_NoPriceEffect(:,AssetType == 1 & AssetSov == 0)')';
    TotalHoldings_CorpBond_1              = nansum(TotalHoldings_1(:,AssetType == 1 & AssetSov == 0)')';
    TotalHoldings_CorpBond_2              = nansum(TotalHoldings_2(:,AssetType == 1 & AssetSov == 0)')';
    
    DeltaHoldings_Equities                = nansum(DeltaHoldings(:,AssetType == 2)')';
    DeltaHoldings_PriceEffect_Equities    = nansum(DeltaHoldings_PriceEffect(:,AssetType == 2)')';
    DeltaHoldings_NoPriceEffect_Equities  = nansum(DeltaHoldings_NoPriceEffect(:,AssetType == 2)')';
    TotalHoldings_Equities_1              = nansum(TotalHoldings_1(:,AssetType == 2)')';
    TotalHoldings_Equities_2              = nansum(TotalHoldings_2(:,AssetType == 2)')';
    
    DeltaHoldings_FundShares                = nansum(DeltaHoldings(:,AssetType == 3)')';
    DeltaHoldings_PriceEffect_FundShares    = nansum(DeltaHoldings_PriceEffect(:,AssetType == 3)')';
    DeltaHoldings_NoPriceEffect_FundShares  = nansum(DeltaHoldings_NoPriceEffect(:,AssetType == 3)')';
    TotalHoldings_FundShares_1              = nansum(TotalHoldings_1(:,AssetType == 3)')';
    TotalHoldings_FundShares_2              = nansum(TotalHoldings_2(:,AssetType == 3)')';    
    
    % Compute overall holdings changes (aggregate results)
    DeltaHoldings               = nansum(DeltaHoldings')';
    DeltaHoldings_PriceEffect   = nansum(DeltaHoldings_PriceEffect')';
    DeltaHoldings_NoPriceEffect = nansum(DeltaHoldings_NoPriceEffect')';
    TotalHoldings_1             = nansum(TotalHoldings_1')';
    TotalHoldings_2             = nansum(TotalHoldings_2')';
  
    
    % Build dataset on holdings changes
    date = umonth(t);
    
    tmp = table(date*ones(length(ufund),1),ufund,...
                TotalHoldings_1,TotalHoldings_2,DeltaHoldings,DeltaHoldings_PriceEffect,DeltaHoldings_NoPriceEffect,...
                TotalHoldings_SovBond_1,TotalHoldings_SovBond_2,DeltaHoldings_SovBond,DeltaHoldings_PriceEffect_SovBond,DeltaHoldings_NoPriceEffect_SovBond,...
                TotalHoldings_CorpBond_1,TotalHoldings_CorpBond_2,DeltaHoldings_CorpBond,DeltaHoldings_PriceEffect_CorpBond,DeltaHoldings_NoPriceEffect_CorpBond,...
                TotalHoldings_Equities_1,TotalHoldings_Equities_2,DeltaHoldings_Equities,DeltaHoldings_PriceEffect_Equities,DeltaHoldings_NoPriceEffect_Equities,...
                TotalHoldings_FundShares_1,TotalHoldings_FundShares_2,DeltaHoldings_FundShares,DeltaHoldings_PriceEffect_FundShares,DeltaHoldings_NoPriceEffect_FundShares);

    if t == 2
       All_Results_DeltaHoldings = tmp;
    else
       All_Results_DeltaHoldings = [All_Results_DeltaHoldings; tmp];
    end

    % Do some housekeeping
    keep umonth t All_Results_DeltaHoldings;
end

% Again, do some housekeeping
keep All_Results_DeltaHoldings;

% Save data on holdings changes for further analyses
save(strcat(projectPath, 'Data\JiangEtAl\All_Results_DeltaHoldings'));